*Code to reproduce results of "Work from Home & Productivity: Evidence from Personnel & Analytics Data on IT Professionals", 2022, forthcoming at JPE Micro

cd "C:\Dropbox\HCL WFH"

import excel using "01.2020-08.2020.WPA.xlsx", clear firstrow
*drop unneeded variables
drop DB DC Location CQ
save temp.dta, replace


*Import and format commute time
import excel using "CommuteTime_Professors_v1.0.xlsx", clear firstrow
drop if Distancemorethan1day!=""|Distance=="1 ft"|strpos(CommuteTime,"day")>0
replace Distance="" if Distance=="Not Found"|Distance=="Errors"
replace CommuteTime="" if CommuteTime=="NA"
replace Distance=subinstr(Distance," mi","",.)
destring Distance, replace
gen hours="0"
replace hours=substr(CommuteTime,1,strpos(CommuteTime,"hour")-2) if strpos(CommuteTime,"hour")>0
destring hours, replace
gen mins=""
replace mins=substr(CommuteTime,strpos(CommuteTime,"min")-3,2) if strpos(CommuteTime,"min")>0
destring mins, replace
drop CommuteTime
gen CommuteTime=hours+mins/60 //in hours, like our input measure
drop if CommuteTime>2
keep MaskedName CommuteTime Distance
save temp2.dta, replace

*Import and prepare sapience and demographics data
import excel using "04.2019-08.2020.demographics.salary.performance.xlsx", clear firstrow

*add commuting time
merge m:1 MaskedName using temp2.dta
drop if _merge==2
drop _merge

*formatting
replace MaskedName=subinstr(MaskedName,"A","",1)
destring MaskedName, replace

gen customerID=subinstr(MaskedCustomerName,"Customer ","",1)
replace customerID="0" if customerID=="" //id 0 for "no customer team" in order not to lose observations
destring customerID, replace


replace Month="19-04" if Month=="Apr'19"
replace Month="19-05" if Month=="May'19"
replace Month="19-06" if Month=="Jun'19"
replace Month="19-07" if Month=="Jul'19"
replace Month="19-08" if Month=="Aug'19"
replace Month="19-09" if Month=="Sep'19"
replace Month="19-10" if Month=="Oct'19"
replace Month="19-11" if Month=="Nov'19"
replace Month="19-12" if Month=="Dec'19"
replace Month="20-01" if Month=="Jan'20"
replace Month="20-02" if Month=="Feb'20"
replace Month="20-03" if Month=="Mar'20"
replace Month="20-04" if Month=="Apr'20"
replace Month="20-05" if Month=="May'20"
replace Month="20-06" if Month=="Jun'20"
replace Month="20-07" if Month=="Jul'20"
replace Month="20-08" if Month=="Aug'20"

encode Month, gen(monthnumber)  
gen monthnumbercentered=monthnumber-12

*generate month dummies to control for seasonal effects
gen monthdummy1=(Month=="20-01")
gen monthdummy2=(Month=="20-02")
gen monthdummy3=(Month=="20-03")
gen monthdummy4=(Month=="20-04"|Month=="19-04")
gen monthdummy5=(Month=="20-05"|Month=="19-05")
gen monthdummy6=(Month=="20-06"|Month=="19-06")
gen monthdummy7=(Month=="20-07"|Month=="19-07")
gen monthdummy8=(Month=="20-08"|Month=="19-08")
gen monthdummy9=(Month=="19-09")
gen monthdummy10=(Month=="19-10")
gen monthdummy11=(Month=="19-11")
gen monthdummy12=(Month=="19-12")

gen maledummy=(Gender=="Male")
replace maledummy=. if Gender==""
drop Gender

*number of kids category "N/A" pools zeros and those without information
rename NoofKids numkids
replace numkids="0" if numkids=="NA"
destring numkids, replace
gen kidsathome=numkids>0
replace kidsathome=. if numkids==.


*normalize time spent working by number of working days in India by month (some months are longer [jan vs feb], some have slightly more weekend days, etc.). Data from https://www.workingdays.in/#y2m3
gen workdays=0
replace workdays=21 if Month=="19-04"
replace workdays=23 if Month=="19-05"
replace workdays=20 if Month=="19-06"
replace workdays=23 if Month=="19-07"
replace workdays=21 if Month=="19-08"
replace workdays=21 if Month=="19-09"
replace workdays=22 if Month=="19-10"
replace workdays=21 if Month=="19-11"
replace workdays=20 if Month=="19-12"
replace workdays=21 if Month=="20-01"
replace workdays=19 if Month=="20-02"
replace workdays=22 if Month=="20-03"
replace workdays=20 if Month=="20-04"
replace workdays=21 if Month=="20-05"
replace workdays=22 if Month=="20-06"
replace workdays=23 if Month=="20-07"
replace workdays=21 if Month=="20-08"

*Month not month-year
replace Month=substr(Month,4,2)
destring Month, replace

gen AverageTimeSpent=TotalTimeSpentHrs/workdays

*generate variable tracking the number of months the employee has been in the sample
bysort MaskedName (monthnumber): gen EmployeeMonth=_n

gen WFH=0
replace WFH=1 if monthnumber>=12 //start in March 2020
replace WFH=. if monthnumber==12 //adjustment month, discard for analysis

*determine high/low performers and high/low time investors pre-WFH
bysort MaskedName: egen meanKPIt=mean(KPIValue) if WFH==0
bysort MaskedName: egen meanKPI=mean(meanKPIt)
bysort MaskedName: egen meanTimePerDayt=mean(AverageTimeSpent) if WFH==0
bysort MaskedName: egen meanTimePerDay=mean(meanTimePerDayt)

summarize meanKPI if EmployeeMonth==1, detail //generate percentiles of mean KPI among employees prior to WFH
gen LowKPI=(meanKPI<=`r(p25)')
gen HighKPI=(meanKPI>=`r(p75)')

summarize meanTimePerDay if EmployeeMonth==1, detail
gen LowTimeInvestment=(meanTimePerDay<=`r(p25)')
gen HighTimeInvestment=(meanTimePerDay>=`r(p75)')

*create standard productivity measure output/time
gen productivity=KPIValue/TotalTimeSpentHrs

*Create outcomes counting commute time as work time
gen productivityCommuteAdjusted=productivity
replace productivityCommuteAdjusted=KPIValue/(TotalTimeSpentHrs+2*CommuteTime*workdays) if WFH==0
gen AverageTimeSpentCommuteAdjusted=TotalTimeSpentHrs/workdays
replace AverageTimeSpentCommuteAdjusted=AverageTimeSpentCommuteAdjusted+CommuteTime*2 if WFH==0

*Reformat performance and salary variables
replace PerformanceRatings=substr(PerformanceRatings,2,1)
replace SalaryBracket=substr(SalaryBracket,2,1)
destring PerformanceRatings SalaryBracket, replace

drop Color KPIName CategoryName Weightage MaskedCustomerName meanKPI meanTimePerDay meanKPIt meanTimePerDayt MaskedEmailId MaskedSAPId Productivity

*Impute missing demographics from other non-missing months
foreach vari of varlist AgeInYears TenureatHCLInYears TotalExperienceInYears ReleveantExperienceinYears numkids kidsathome CurrentWorkingLocation SalaryBracket PerformanceRatings RMMAskedIds maledummy { 
	bysort MaskedName (monthnumber): replace `vari' =`vari'[_n-1] if missing(`vari'[_n])&MaskedName!=.
}
gen negmonth=-monthnumber
foreach vari of varlist AgeInYears TenureatHCLInYears TotalExperienceInYears ReleveantExperienceinYears numkids kidsathome CurrentWorkingLocation SalaryBracket PerformanceRatings RMMAskedIds maledummy{ 
	bysort MaskedName (negmonth): replace `vari' =`vari'[_n-1] if missing(`vari'[_n])&MaskedName!=.
}
drop negmonth

*Median splits
quietly sum TenureatHCLInYears if EmployeeMonth==1, detail
gen HighTenure=TenureatHCLInYears>=r(p50)
replace HighTenure=. if TenureatHCLInYears==.

quietly sum AgeInYears if EmployeeMonth==1, detail
gen HighAge=AgeInYears>=r(p50)
replace HighAge=. if AgeInYears==.

quietly sum ReleveantExperienceinYears if EmployeeMonth==1, detail
gen HighExperience=ReleveantExperienceinYears>=r(p50)
replace HighExperience=. if ReleveantExperienceinYears==.

gen HighPay=(SalaryBracket>4)
replace HighPay=. if SalaryBracket==.

gen HitTarget=(KPIValue>=100)

save "productivitydataset.dta", replace


******************************
* SUMMARY STATISTICS
******************************
use productivitydataset.dta, clear

*TABLE 1: For outcome variables, pre-WFH vs WFH
estpost  tabstat AverageTimeSpent KPIValue productivity, statistics( mean sd p25 p75 count) by(WFH) nototal  columns(statistics)
esttab . using "./tables/summaryoutcome.tex" , cells("mean(fmt(%5.2f)) sd(fmt(%5.2f)) p25 p75 count(fmt(%5.0f))") collabels("Mean" "SD" "1st Quartile" "3rd Quartile" "N") rename(AverageTimeSpent "Input" KPIValue "Output" productivity Productivity) nomtitle nonumber noobs booktabs replace


*TABLE 2: For demographic/HR variables...we have them only once
use productivitydataset.dta, clear

estpost  tabstat AgeInYears HighAge TenureatHCLInYears HighTenure ReleveantExperienceinYears HighExperience maledummy numkids kidsathome CommuteTime PerformanceRatings if EmployeeMonth==1, statistics( mean sd p25 p75 count)  columns(statistics)
esttab . using "./tables/summarydemographics.tex", cells("mean(fmt(%5.2f)) sd(fmt(%5.2f)) p25(fmt(%5.2f)) p75(fmt(%5.2f)) count(fmt(%5.0f))") collabels("Mean" "SD" "1st Quartile" "3rd Quartile" "N") rename(AgeInYears "Age (in years)"  TenureatHCLInYears "Tenure (in years)" ReleveantExperienceinYears "Experience (in years)" maledummy Male numkids "NumChildren" kidsathome "Children" PerformanceRatings "Rating") nomtitle nonumber booktabs noobs replace


******************************
* PLOTS FOR RAW DATA OVER TIME
******************************
use productivitydataset.dta, clear


*FIGURE 1a: Input: no outliers
_pctile AverageTimeSpent, p(0.1, 99.9)
gen sample=(AverageTimeSpent>r(r1)&AverageTimeSpent<r(r2))
quietly reg AverageTimeSpent i.monthnumber if sample==1, vce(cluster MaskedName)
margins, at(monthnumber==(1(1)17))
marginsplot, xline(12, lcolor(black)) nolabels 
gr_edit .style.editstyle boxstyle(shadestyle(color(white))) editcopy
gr_edit .style.editstyle boxstyle(linestyle(color(white))) editcopy
gr_edit .yaxis1.title.text = {}
gr_edit .yaxis1.title.text.Arrpush Time worked per working day
gr_edit .plotregion1.plot1.style.editstyle area(linestyle(width(medthick))) editcopy
gr_edit .plotregion1.plot2.style.editstyle line(width(medthick)) editcopy
gr_edit .title.text = {}
gr_edit .title.text.Arrpush 
gr_edit .xaxis1.edit_tick 1 1 `"-11"', tickset(major)
gr_edit .xaxis1.edit_tick 2 2 `"-10"', tickset(major)
gr_edit .xaxis1.edit_tick 3 3 `"-9"', tickset(major)
gr_edit .xaxis1.edit_tick 4 4 `"-8"', tickset(major)
gr_edit .xaxis1.edit_tick 5 5 `"-7"', tickset(major)
gr_edit .xaxis1.edit_tick 6 6 `"-6"', tickset(major)
gr_edit .xaxis1.edit_tick 7 7 `"-5"', tickset(major)
gr_edit .xaxis1.edit_tick 8 8 `"-4"', tickset(major)
gr_edit .xaxis1.edit_tick 9 9 `"-3"', tickset(major)
gr_edit .xaxis1.edit_tick 10 10 `"-2"', tickset(major)
gr_edit .xaxis1.edit_tick 11 11 `"-1"', tickset(major)
gr_edit .xaxis1.edit_tick 12 12 `"0"', tickset(major)
gr_edit .xaxis1.edit_tick 13 13 `"1"', tickset(major)
gr_edit .xaxis1.edit_tick 14 14 `"2"', tickset(major)
gr_edit .xaxis1.edit_tick 15 15 `"3"', tickset(major)
gr_edit .xaxis1.edit_tick 16 16 `"4"', tickset(major)
gr_edit .xaxis1.edit_tick 17 17 `"5"', tickset(major)
graph export "./graphs/graph-timebyworkingday-nooutlier.eps", replace as(eps)


*FIGURE 1b: Output: KPI without outliers
_pctile KPIValue, p(0.1, 99.9)
replace sample=(KPIValue>r(r1)&KPIValue<r(r2))
quietly reg KPIValue i.monthnumber if sample==1, vce(cluster MaskedName)
margins, at(monthnumber==(1(1)17)) noestimcheck
marginsplot , xline(12, lcolor(black)) nolabels  yscale(r(99 102)) ylabel(#5)
gr_edit .style.editstyle boxstyle(shadestyle(color(white))) editcopy
gr_edit .style.editstyle boxstyle(linestyle(color(white))) editcopy
gr_edit .yaxis1.title.text = {}
gr_edit .yaxis1.title.text.Arrpush Output by month
gr_edit .plotregion1.plot1.style.editstyle area(linestyle(width(medthick))) editcopy
gr_edit .plotregion1.plot2.style.editstyle line(width(medthick)) editcopy
gr_edit .title.text = {}
gr_edit .title.text.Arrpush 
gr_edit .xaxis1.edit_tick 1 1 `"-11"', tickset(major)
gr_edit .xaxis1.edit_tick 2 2 `"-10"', tickset(major)
gr_edit .xaxis1.edit_tick 3 3 `"-9"', tickset(major)
gr_edit .xaxis1.edit_tick 4 4 `"-8"', tickset(major)
gr_edit .xaxis1.edit_tick 5 5 `"-7"', tickset(major)
gr_edit .xaxis1.edit_tick 6 6 `"-6"', tickset(major)
gr_edit .xaxis1.edit_tick 7 7 `"-5"', tickset(major)
gr_edit .xaxis1.edit_tick 8 8 `"-4"', tickset(major)
gr_edit .xaxis1.edit_tick 9 9 `"-3"', tickset(major)
gr_edit .xaxis1.edit_tick 10 10 `"-2"', tickset(major)
gr_edit .xaxis1.edit_tick 11 11 `"-1"', tickset(major)
gr_edit .xaxis1.edit_tick 12 12 `"0"', tickset(major)
gr_edit .xaxis1.edit_tick 13 13 `"1"', tickset(major)
gr_edit .xaxis1.edit_tick 14 14 `"2"', tickset(major)
gr_edit .xaxis1.edit_tick 15 15 `"3"', tickset(major)
gr_edit .xaxis1.edit_tick 16 16 `"4"', tickset(major)
gr_edit .xaxis1.edit_tick 17 17 `"5"', tickset(major)
graph export "./graphs/graph-outputbymonth-nooutlier.eps", replace as(eps)


*FIGURE 1c: Productivity, no outliers
_pctile productivity, p(0.1, 99.9)
replace sample=(productivity>r(r1)&productivity<r(r2))
quietly reg productivity i.monthnumber if sample==1, vce(cluster MaskedName)
margins, at(monthnumber==(1(1)17))
marginsplot, xline(12, lcolor(black)) nolabels 
gr_edit .style.editstyle boxstyle(shadestyle(color(white))) editcopy
gr_edit .style.editstyle boxstyle(linestyle(color(white))) editcopy
gr_edit .yaxis1.title.text = {}
gr_edit .yaxis1.title.text.Arrpush Productivity
gr_edit .plotregion1.plot1.style.editstyle area(linestyle(width(medthick))) editcopy
gr_edit .plotregion1.plot2.style.editstyle line(width(medthick)) editcopy
gr_edit .title.text = {}
gr_edit .title.text.Arrpush  
gr_edit .xaxis1.edit_tick 1 1 `"-11"', tickset(major)
gr_edit .xaxis1.edit_tick 2 2 `"-10"', tickset(major)
gr_edit .xaxis1.edit_tick 3 3 `"-9"', tickset(major)
gr_edit .xaxis1.edit_tick 4 4 `"-8"', tickset(major)
gr_edit .xaxis1.edit_tick 5 5 `"-7"', tickset(major)
gr_edit .xaxis1.edit_tick 6 6 `"-6"', tickset(major)
gr_edit .xaxis1.edit_tick 7 7 `"-5"', tickset(major)
gr_edit .xaxis1.edit_tick 8 8 `"-4"', tickset(major)
gr_edit .xaxis1.edit_tick 9 9 `"-3"', tickset(major)
gr_edit .xaxis1.edit_tick 10 10 `"-2"', tickset(major)
gr_edit .xaxis1.edit_tick 11 11 `"-1"', tickset(major)
gr_edit .xaxis1.edit_tick 12 12 `"0"', tickset(major)
gr_edit .xaxis1.edit_tick 13 13 `"1"', tickset(major)
gr_edit .xaxis1.edit_tick 14 14 `"2"', tickset(major)
gr_edit .xaxis1.edit_tick 15 15 `"3"', tickset(major)
gr_edit .xaxis1.edit_tick 16 16 `"4"', tickset(major)
gr_edit .xaxis1.edit_tick 17 17 `"5"', tickset(major)
graph export "./graphs/graph-productivitybymonth-nooutlier.eps", replace as(eps)


*FIGURE 1d: Log Productivity, no outliers
gen LogProductivity=log(productivity)
_pctile LogProductivity, p(0.1, 99.9)
replace sample=(LogProductivity>r(r1)&LogProductivity<r(r2))
quietly reg LogProductivity i.monthnumber if sample==1, vce(cluster MaskedName)
margins, at(monthnumber==(1(1)17))
marginsplot, xline(12, lcolor(black)) nolabels 
gr_edit .style.editstyle boxstyle(shadestyle(color(white))) editcopy
gr_edit .style.editstyle boxstyle(linestyle(color(white))) editcopy
gr_edit .yaxis1.title.text = {}
gr_edit .yaxis1.title.text.Arrpush Log(Productivity)
gr_edit .plotregion1.plot1.style.editstyle area(linestyle(width(medthick))) editcopy
gr_edit .plotregion1.plot2.style.editstyle line(width(medthick)) editcopy
gr_edit .title.text = {}
gr_edit .title.text.Arrpush  
gr_edit .xaxis1.edit_tick 1 1 `"-11"', tickset(major)
gr_edit .xaxis1.edit_tick 2 2 `"-10"', tickset(major)
gr_edit .xaxis1.edit_tick 3 3 `"-9"', tickset(major)
gr_edit .xaxis1.edit_tick 4 4 `"-8"', tickset(major)
gr_edit .xaxis1.edit_tick 5 5 `"-7"', tickset(major)
gr_edit .xaxis1.edit_tick 6 6 `"-6"', tickset(major)
gr_edit .xaxis1.edit_tick 7 7 `"-5"', tickset(major)
gr_edit .xaxis1.edit_tick 8 8 `"-4"', tickset(major)
gr_edit .xaxis1.edit_tick 9 9 `"-3"', tickset(major)
gr_edit .xaxis1.edit_tick 10 10 `"-2"', tickset(major)
gr_edit .xaxis1.edit_tick 11 11 `"-1"', tickset(major)
gr_edit .xaxis1.edit_tick 12 12 `"0"', tickset(major)
gr_edit .xaxis1.edit_tick 13 13 `"1"', tickset(major)
gr_edit .xaxis1.edit_tick 14 14 `"2"', tickset(major)
gr_edit .xaxis1.edit_tick 15 15 `"3"', tickset(major)
gr_edit .xaxis1.edit_tick 16 16 `"4"', tickset(major)
gr_edit .xaxis1.edit_tick 17 17 `"5"', tickset(major)
graph export "./graphs/graph-logproductivitybymonth-nooutlier.eps", replace as(eps)


***************************
* TABLE 4: AVERAGE EFFECT OF WFH (excluding March 2020, since WFH=. in that month)
***************************
use productivitydataset.dta, clear
xtset MaskedName
gen LogProductivity=log(productivity)

eststo clear
*Input: time per day
eststo: xtreg AverageTimeSpent WFH i.Month i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg AverageTimeSpent WFH c.monthnumber i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
*Output: KPI
eststo: xtreg KPIValue WFH i.Month i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg KPIValue WFH c.monthnumber i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
*Productivity
eststo: xtreg productivity WFH i.Month  i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg productivity WFH c.monthnumber  i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg LogProductivity WFH i.Month  i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg LogProductivity WFH c.monthnumber  i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 

	#delimit ;
	esttab using "./tables/AverageWFHeffect.tex", 
	cells(b(star fmt(%9.3f)) se(par fmt(%9.3f) )) starlevels(* .05 ** 0.01 *** .001) 
	stats(r2 N N_clust, fmt(%9.2f %9.0f %9.0f) labels(R$^2$ Observations Clusters))	
	keep( WFH monthnumber)
	varlabels(WFH "WFH" monthnumber "Linear month trend",
	elist( monthnumber "\midrule Employee FE &Yes&Yes&Yes&Yes&Yes&Yes&Yes&Yes \\ Team FE &Yes&Yes&Yes&Yes&Yes&Yes&Yes&Yes \\ \\ Month FE &Yes&No&Yes&No&Yes&No&Yes&No \\" )) 
	 nonumbers collabels(,none)  mlabels("(1)" "(2)" "(3)" "(4)" "(5)" "(6)" "(7)" "(8)") 
	prehead("\begin{table}[t]" "\footnotesize" "\caption{Average Working-From-Home effect}" "\label{averageWFHeffect}"
	"\begin{center}" "\begin{tabular}{lcccccccc}" 
	"\toprule") posthead("[3mm] Dependent variable &  Input  & Input  & Output & Output & Productivity & Productivity &LogProd&LogProd  \\" "\midrule")  prefoot("") 
	postfoot("\bottomrule" "\end{tabular}" "\\ [2mm] \begin{minipage}{\textwidth}" 
	"\footnotesize" "{\it Note:} 
	Input is the individual time in hours that the employee worked per working day in a month. Output is the normalized output of the employee relative to the target in a month.
	Productivity is output divided time worked. LogProd is the natural logarithm of Productivity. The unit of observation is the employee-month. Standard errors are shown in brackets below the point estimates, and are clustered on employee level.
	***Significant at the 0.1\% level; **significant at the 1\% level; *significant at the 5\% level.                           
	
	" "\end{minipage}" 
	"\end{center}" "\end{table}") style(tex) replace
;
#delimit cr


************************
* TABLE 5: HETEROGENEOUS WFH EFFECTS: Kids and gender
************************
use productivitydataset.dta, clear
xtset MaskedName
eststo clear
eststo: xtreg AverageTimeSpent WFH 1.WFH#1.kidsathome c.monthnumber i.customerID if monthnumber!=12, vce(cluster MaskedName) fe
eststo: xtreg AverageTimeSpent WFH 1.WFH#1.maledummy 1.WFH#1.kidsathome 1.WFH#1.maledummy#1.kidsathome c.monthnumber i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg KPIValue WFH 1.WFH#1.kidsathome monthdummy1-monthdummy11 i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg KPIValue WFH 1.WFH#1.maledummy 1.WFH#1.kidsathome 1.WFH#1.maledummy#1.kidsathome monthdummy1-monthdummy11 i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg productivity WFH 1.WFH#1.kidsathome monthdummy1-monthdummy11  i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg productivity WFH 1.WFH#1.maledummy 1.WFH#1.kidsathome 1.WFH#1.maledummy#1.kidsathome monthdummy1-monthdummy11  i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 



	#delimit ;
	esttab using "./tables/HeterogeneousWFHeffect-ChildrenGender.tex", 
	cells(b(star fmt(%9.3f)) se(par fmt(%9.3f) )) starlevels(* .05 ** 0.01 *** .001) 
	stats(r2 N N_clust, fmt(%9.2f %9.0f %9.0f) labels(R$^2$ Observations Clusters))	
	keep( WFH 1.WFH#1.kidsathome 1.WFH#1.maledummy 1.WFH#1.maledummy#1.kidsathome )
	varlabels(WFH "WFH" 1.WFH#1.kidsathome "WFH $\times$ Children" 1.WFH#1.maledummy "WFH $\times$ Male" 1.WFH#1.maledummy#1.kidsathome "WFH $\times$ Male $\times$ Children" ,
	elist( 1.WFH#1.maledummy#1.kidsathome "\midrule Employee FE &Yes&Yes&Yes&Yes&Yes &Yes \\ Team FE &Yes&Yes&Yes&Yes&Yes&Yes \\Month FE &No&No&Yes&Yes&Yes&Yes \\ Linear Month Trend &Yes&Yes&No&No&No&No \\" )) 
	 nonumbers collabels(,none)  mlabels("(1)" "(2)" "(3)" "(4)" "(5)" "(6)") 
	prehead("\begin{table}[t]" "\footnotesize" "\caption{Working-From-Home: Children at home and gender differences}" "\label{heterogeneousWFHeffect-ChildrenGender}"
	"\begin{center}" "\begin{tabular}{lcccccc}" 
	"\toprule") posthead("[3mm] Dependent variable &  Input  & Input  & Output & Output & Productivity&Productivity \\" "\midrule")  prefoot("") 
	postfoot("\bottomrule" "\end{tabular}" "\\ [2mm] \begin{minipage}{\textwidth}" 
	"\footnotesize" "{\it Note:} 
	Input is the individual time in hours that the employee worked per working day in a month. Output is the normalized output of the employee relative to the target in a month.
	Productivity is output divided time worked. The unit of observation is the employee-month. Standard errors are shown in brackets below the point estimates, and are clustered on employee level.
	***Significant at the 0.1\% level; **significant at the 1\% level; *significant at the 5\% level.                           
	
	" "\end{minipage}" 
	"\end{center}" "\end{table}") style(tex) replace
;
#delimit cr


**************************************
* TABLE 6: HETEROGENOUS WFH EFFECTS: age, experience, tenure, commute...
**************************************
use productivitydataset.dta, clear
xtset MaskedName
eststo clear
eststo: xtreg AverageTimeSpent WFH 1.WFH#1.HighTenure 1.WFH#1.HighAge 1.WFH#1.HighExperience c.monthnumber i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg AverageTimeSpent WFH 1.WFH#c.CommuteTime c.monthnumber i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 

eststo: xtreg KPIValue WFH 1.WFH#1.HighTenure 1.WFH#1.HighAge 1.WFH#1.HighExperience monthdummy1-monthdummy11 i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg KPIValue WFH 1.WFH#c.CommuteTime monthdummy1-monthdummy11 i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 

eststo: xtreg productivity WFH 1.WFH#1.HighTenure 1.WFH#1.HighAge 1.WFH#1.HighExperience monthdummy1-monthdummy11  i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 
eststo: xtreg productivity WFH 1.WFH#c.CommuteTime monthdummy1-monthdummy11  i.customerID if monthnumber!=12, vce(cluster MaskedName) fe 

	#delimit ;
	esttab using "./tables/HeterogeneousWFHeffect-ExperienceCommute.tex", 
	cells(b(star fmt(%9.3f)) se(par fmt(%9.3f) )) starlevels(* .05 ** 0.01 *** .001) 
	stats(r2 N N_clust, fmt(%9.2f %9.0f %9.0f) labels(R$^2$ Observations Clusters))	
	keep( WFH 1.WFH#1.HighTenure 1.WFH#1.HighAge 1.WFH#1.HighExperience 1.WFH#c.CommuteTime )
	varlabels(WFH "WFH" 1.WFH#1.HighTenure "WFH $\times$ HighTenure" 1.WFH#1.HighAge "WFH $\times$ HighAge" 1.WFH#1.HighExperience "WFH $\times$ HighExperience" 1.WFH#c.CommuteTime "WFH $\times$ CommuteTime" ,
	elist( 1.WFH#c.CommuteTime "\midrule Employee FE &Yes&Yes&Yes&Yes&Yes &Yes \\ Team FE &Yes&Yes&Yes&Yes&Yes&Yes \\Month FE &No&No&Yes&Yes&Yes&Yes \\ Linear Month Trend &Yes&Yes&No&No&No&No \\" )) 
	 nonumbers collabels(,none)  mlabels("(1)" "(2)" "(3)" "(4)" "(5)" "(6)") 
	prehead("\begin{table}[t]" "\footnotesize" "\caption{Working-From-Home: Age, experience, tenure, commute times}" "\label{heterogeneousWFHeffect-ExperienceCommute}"
	"\begin{center}" "\begin{tabular}{lcccccc}" 
	"\toprule") posthead("[3mm] Dependent variable &  Input  & Input  & Output & Output & Productivity&Productivity \\" "\midrule")  prefoot("") 
	postfoot("\bottomrule" "\end{tabular}" "\\ [2mm] \begin{minipage}{\textwidth}" 
	"\footnotesize" "{\it Note:} 
	Input is the individual time in hours that the employee worked per working day in a month. Output is the normalized output of the employee relative to the target in a month.
	Productivity is output divided time worked. The unit of observation is the employee-month. Standard errors are shown in brackets below the point estimates, and are clustered on employee level.
	***Significant at the 0.1\% level; **significant at the 1\% level; *significant at the 5\% level.                           
	
	" "\end{minipage}" 
	"\end{center}" "\end{table}") style(tex) replace
;
#delimit cr




********************************
* APPENDIX MATERIALS
********************************


***********************************
* TABLE A.1: Do productivity, output or working time predict the employee rating?
***********************************
use productivitydataset.dta, clear
*Our rating is from May/June 2020. So let us use an average of the outcomes from January 2020 to May 2020.
gen RatingTimeWindow5Months=0
gen RatingTimeWindow10Months=0
replace RatingTimeWindow5Months=1 if monthnumber>9&monthnumber<15
replace RatingTimeWindow10Months=1 if monthnumber>2&monthnumber<15
bysort MaskedName RatingTimeWindow5Months: egen MeanProductivity5=mean(productivity)
bysort MaskedName RatingTimeWindow5Months: egen MeanOutput5=mean(KPIValue)
bysort MaskedName RatingTimeWindow5Months: egen MeanInput5=mean(AverageTimeSpent)
bysort MaskedName RatingTimeWindow10Months: egen MeanProductivity10=mean(productivity)
bysort MaskedName RatingTimeWindow10Months: egen MeanOutput10=mean(KPIValue)
bysort MaskedName RatingTimeWindow10Months: egen MeanInput10=mean(AverageTimeSpent)

bysort MaskedName RatingTimeWindow10Months (monthnumber): gen indmonth10=_n
bysort MaskedName RatingTimeWindow5Months (monthnumber): gen indmonth5=_n

eststo clear
eststo: reg PerformanceRatings MeanInput5 MeanOutput5  MeanProductivity5 if RatingTimeWindow5Months==1&indmonth5==1, vce(robust)
eststo: ologit PerformanceRatings MeanInput5 MeanOutput5  MeanProductivity5 if RatingTimeWindow5Months==1&indmonth5==1,vce(robust)
eststo: reg PerformanceRatings MeanInput10 MeanOutput10  MeanProductivity10 if RatingTimeWindow10Months==1&indmonth10==1,vce(robust)
eststo: ologit PerformanceRatings MeanInput10 MeanOutput10  MeanProductivity10 if RatingTimeWindow10Months==1&indmonth10==1,vce(robust)

	#delimit ;
	esttab using "./tables/Robustness-PredictRating.tex", 
	cells(b(star fmt(%9.3f)) se(par fmt(%9.3f) )) starlevels(* .05 ** 0.01 *** .001) 
	stats(r2 N , fmt(%9.2f %9.0f ) labels(R$^2$ Observations ))	
	varlabels( _cons "Constant" ,	elist( _cons "\midrule " )) 
	drop(cut1 cut2 cut3 cut4 )
	 nonumbers collabels(,none)  mlabels("(1) OLS" "(2) Ordered Logit" "(3) OLS" "(4) Ordered Logit") 
	prehead("\begin{table}[h]" "\footnotesize" "\caption{Do the Sapience outcome measures predict peformance evaluations?}" "\label{robustness-predictrating}"
	"\begin{center}" "\begin{tabular}{lcccc}" 
	"\toprule") posthead("[3mm] Dependent variable &  Rating  & Rating  & Rating & Rating \\" "\midrule")  prefoot("") 
	postfoot("\bottomrule" "\end{tabular}" "\\ [2mm] \begin{minipage}{\textwidth}" 
	"\footnotesize" "{\it Note:} 
	MeanInputX is the average of Input (hours worked) over the most recent X months prior to the performance rating. Similarly, MeanOutputX and MeanProductivityX are the averages of Output and Productivity, respectively, over the most recent X months prior to the performance rating. Rating takes integer values 1 to 5, with 1 being the best. Each observation is one employee. Heteroskedasticity-robust standard errors are shown in brackets below the point estimates.
	***Significant at the 0.1\% level; **significant at the 1\% level; *significant at the 5\% level.                           
	
	" "\end{minipage}" 
	"\end{center}" "\end{table}") style(tex) replace
;
#delimit cr

***************************
* TABLE A.5: FOR AVERAGE EFFECT OF WFH, truncating outliers
***************************
use productivitydataset.dta, clear
xtset MaskedName
gen LogProductivity=log(productivity)

eststo clear
*Input: time per day
quietly sum AverageTimeSpent if monthnumber!=12, detail
gen sample=(monthnumber!=12&AverageTimeSpent<=r(p99)&AverageTimeSpent>=r(p1))
eststo: xtreg AverageTimeSpent WFH i.Month i.customerID if sample==1, vce(cluster MaskedName) fe 
eststo: xtreg AverageTimeSpent WFH c.monthnumber i.customerID if sample==1, vce(cluster MaskedName) fe 
*Output: KPI
quietly sum KPIValue if monthnumber!=12, detail
replace sample=(monthnumber!=12&KPIValue<=r(p99)&KPIValue>=r(p1))
eststo: xtreg KPIValue WFH i.Month i.customerID if sample==1, vce(cluster MaskedName) fe 
eststo: xtreg KPIValue WFH c.monthnumber i.customerID if sample==1, vce(cluster MaskedName) fe 
*Productivity
quietly sum productivity if monthnumber!=12, detail
replace sample=(monthnumber!=12&productivity<=r(p99)&productivity>=r(p1))
eststo: xtreg productivity WFH i.Month  i.customerID if sample==1, vce(cluster MaskedName) fe 
eststo: xtreg productivity WFH c.monthnumber  i.customerID if sample==1, vce(cluster MaskedName) fe 
quietly sum LogProductivity if monthnumber!=12, detail
replace sample=(monthnumber!=12&LogProductivity<=r(p99)&LogProductivity>=r(p1))
eststo: xtreg LogProductivity WFH i.Month  i.customerID if sample==1, vce(cluster MaskedName) fe 
eststo: xtreg LogProductivity WFH c.monthnumber  i.customerID if sample==1, vce(cluster MaskedName) fe 

	#delimit ;
	esttab using "./tables/AverageWFHeffect-truncated.tex", 
	cells(b(star fmt(%9.3f)) se(par fmt(%9.3f) )) starlevels(* .05 ** 0.01 *** .001) 
	stats(r2 N N_clust, fmt(%9.2f %9.0f %9.0f) labels(R$^2$ Observations Clusters))	
	keep( WFH monthnumber)
	varlabels(WFH "WFH" monthnumber "Linear month trend",
	elist( monthnumber "\midrule Employee FE &Yes&Yes&Yes&Yes&Yes&Yes&Yes&Yes \\ Team FE &Yes&Yes&Yes&Yes&Yes&Yes&Yes&Yes \\ Month FE &Yes&No&Yes&No&Yes&No&Yes&No \\" )) 
	 nonumbers collabels(,none)  mlabels("(1)" "(2)" "(3)" "(4)" "(5)" "(6)" "(7)" "(8)") 
	prehead("\begin{table}[t]" "\footnotesize" "\caption{Average Working-From-Home effect (top and bottom 1\% of outcomes truncated)}" "\label{averageWFHeffect-truncated}"
	"\begin{center}" "\begin{tabular}{lcccccccc}" 
	"\toprule") posthead("[3mm] Dependent variable &  Input  & Input  & Output & Output & Productivity & Productivity & LogProd & LogProd \\" "\midrule")  prefoot("") 
	postfoot("\bottomrule" "\end{tabular}" "\\ [2mm] \begin{minipage}{\textwidth}" 
	"\footnotesize" "{\it Note:} 
	Input is the individual time in hours that the employee worked per working day in a month. Output is the normalized output of the employee relative to the target in a month.
	Productivity is output divided time worked. LogProd is the natural logarithm of Productivity. The unit of observation is the employee-month. Standard errors are shown in brackets below the point estimates, and are clustered on employee level.
	The top 1\% and bottom 1\% of outcomes are discarded before running the regression to deal with potential outliers.
	***Significant at the 0.1\% level; **significant at the 1\% level; *significant at the 5\% level.                           
	
	" "\end{minipage}" 
	"\end{center}" "\end{table}") style(tex) replace
;
#delimit cr


********************
* TABLE A.6: MORE SICK DAYS DURING WFH?
********************
use productivitydataset.dta, clear
gen NonSickDayShare=DaysWorked/workdays

xtset MaskedName
eststo clear
eststo: xtreg NonSickDayShare WFH monthdummy1-monthdummy11 i.customerID if monthnumber!=12, vce(cluster MaskedName) fe
eststo: xtreg NonSickDayShare WFH c.monthnumber i.customerID if monthnumber!=12, vce(cluster MaskedName) fe

	#delimit ;
	esttab using "./tables/sickdays.tex", 
	cells(b(star fmt(%9.3f)) se(par fmt(%9.3f) )) starlevels(* .05 ** 0.01 *** .001) 
	stats(r2 N N_clust, fmt(%9.2f %9.0f %9.0f) labels(R$^2$ Observations Clusters))	
	keep( WFH monthnumber)
	varlabels(WFH "WFH" monthnumber "Linear month trend",
	elist( monthnumber "\midrule Employee FE &Yes&Yes \\ Team FE &Yes&Yes \\ Month FE &Yes&No \\" )) 
	 nonumbers collabels(,none)  mlabels("(1)" "(2)") 
	prehead("\begin{table}[t]" "\footnotesize" "\caption{WFH effect on (non-)sick days}" "\label{sickdays}"
	"\begin{center}" "\begin{tabular}{lcc}" 
	"\toprule") posthead("[3mm] Dependent variable &  NonSickDayShare  & NonSickDayShare \\" "\midrule")  prefoot("") 
	postfoot("\bottomrule" "\end{tabular}" "\\ [2mm] \begin{minipage}{\textwidth}" 
	"\footnotesize" "{\it Note:} 
	NonSickDayShare is the share of days in a month where the employee worked at least two hours, based on Sapience measurement, relative to the number of work days in that month. The unit of observation is the employee-month. Standard errors are shown in brackets below the point estimates, and are clustered on employee level.
	The top 1\% and bottom 1\% of outcomes are discarded before running the regression to deal with potential outliers.
	***Significant at the 0.1\% level; **significant at the 1\% level; *significant at the 5\% level.                           
	
	" "\end{minipage}" 
	"\end{center}" "\end{table}") style(tex) replace
;
#delimit cr

*****************************************************************************************************************
*** PART III: This part generates results reported in Section 3.3 (and the Appendix) which use WPA variables ****
*****************************************************************************************************************

use WFH_WPA_data.dta, clear

gen time=0
replace time=-1 if date=="08/03/2020"
replace time=-2 if date=="01/03/2020"
replace time=-3 if date=="23/02/2020"
replace time=-4 if date=="16/02/2020"
replace time=-5 if date=="09/02/2020"
replace time=-6 if date=="02/02/2020"
replace time=-7 if date=="26/01/2020"
replace time=-8 if date=="19/01/2020"
replace time=-9 if date=="12/01/2020"
replace time=-10 if date=="05/01/2020"
replace time=1 if date=="22/03/2020"
replace time=2 if date=="29/03/2020"
replace time=3 if date=="05/04/2020"
replace time=4 if date=="12/04/2020"
replace time=5 if date=="19/04/2020"
replace time=6 if date=="26/04/2020"
replace time=7 if date=="03/05/2020"
replace time=8 if date=="10/05/2020"
replace time=9 if date=="17/05/2020"
replace time=10 if date=="24/05/2020"
replace time=11 if date=="31/05/2020"
replace time=12 if date=="07/06/2020"
replace time=13 if date=="14/06/2020"
replace time=14 if date=="21/06/2020"
replace time=15 if date=="28/06/2020"
replace time=16 if date=="05/07/2020"
replace time=17 if date=="12/07/2020"
replace time=18 if date=="19/07/2020"
replace time=19 if date=="26/07/2020"
replace time=20 if date=="02/08/2020"
replace time=21 if date=="09/08/2020"
replace time=22 if date=="16/08/2020"
replace time=23 if date=="23/08/2020"
replace time=24 if date=="30/08/2020"

***********************************************
* Figure 3: Working patterns pre- and post WFH
***********************************************

* 3a

gen working_hours=working_hours_instant_messages+working_hours_in_calls+working_hours_email_hours+working_hours_collaboration_hour+total_focus_hours

sort time
by time: egen mean_working_hours=mean(working_hours)
by time: egen sd_working_hours=sd(working_hours)
by time: egen n_working_hours=count(working_hours)


gen upper=mean_working_hours+invttail(n_working_hours-1,0.025)*(sd_working_hours/sqrt(n_working_hours))
gen lower=mean_working_hours-invttail(n_working_hours-1,0.025)*(sd_working_hours/sqrt(n_working_hours))


twoway (connected mean_working_hours time, sort xline(0, lcolor(black)) xlabel(-10(2)24) ylabel(38(4)54) ytitle("Working Hours (Number of Hrs)") xtitle("Week")) (rcap upper lower time, lcolor(black) lwidth(vvthin)), leg(off)

graph save working_hours.gph, replace
graph export working_hours.png, replace

drop upper lower

* 3b
gen after_hours=after_hours_meeting_hours+after_hours_instant_messages+after_hours_in_calls+after_hours_email_hours+after_hours_collaboration_hours

by time: egen mean_after_hours=mean(after_hours)
by time: egen sd_after_hours=sd(after_hours)
by time: egen n_after_hours=count(after_hours)


gen upper=mean_after_hours+invttail(n_after_hours-1,0.025)*(sd_after_hours/sqrt(n_after_hours))
gen lower=mean_after_hours-invttail(n_after_hours-1,0.025)*(sd_after_hours/sqrt(n_after_hours))


twoway (connected mean_after_hours time, sort xline(0, lcolor(black)) xlabel(-10(2)24) ylabel(0(2)16) ytitle("After Hours (Number of Hrs)") xtitle("Week")) (rcap upper lower time, lcolor(black) lwidth(vvthin)), leg(off)


graph save after_hours.gph, replace
graph export after_hours.png, replace

drop upper lower

* 3c

sort time
by time: egen mean_focus_hours=mean(total_focus_hours)
by time: egen sd_focus_hours=sd(total_focus_hours)
by time: egen n_focus_hours=count(total_focus_hours)


gen upper=mean_focus_hours+invttail(n_focus_hours-1,0.025)*(sd_focus_hours/sqrt(n_focus_hours))
gen lower=mean_focus_hours-invttail(n_focus_hours-1,0.025)*(sd_focus_hours/sqrt(n_focus_hours))


twoway (connected mean_focus_hours time, sort xline(0, lcolor(black)) xlabel(-10(2)24) ylabel(24(4)40) ytitle("Focus Hours (Number of Hrs)") xtitle("Week")) (rcap upper lower time, lcolor(black) lwidth(vvthin)), leg(off)

graph save focus_hours.gph, replace
graph export focus_hours.png, replace

drop upper lower

* 3d

by time: egen mean_collaboration_hours=mean(collaboration_hrs)
by time: egen sd_collaboration_hours=sd(collaboration_hours)
by time: egen n_collaboration_hours=count(collaboration_hours)


gen upper=mean_collaboration_hours+invttail(n_collaboration_hours-1,0.025)*(sd_collaboration_hours/sqrt(n_collaboration_hours))
gen lower=mean_collaboration_hours-invttail(n_collaboration_hours-1,0.025)*(sd_collaboration_hours/sqrt(n_collaboration_hours))


twoway (connected mean_collaboration_hours time, sort xline(0, lcolor(black)) xlabel(-10(2)24) ylabel(8(4)20) ytitle("Collaboration Hours (Number of Hrs)") xtitle("Week")) (rcap upper lower time, lcolor(black) lwidth(vvthin)), leg(off)


graph save collaboration_hours.gph, replace
graph export collaboration_hours.png, replace

drop upper lower

************************************************
*Table 7: Shift in Working Patterns due to WFH
************************************************

gen WFH=.
replace WFH=0 if time<0
replace WFH=1 if time>0

tabulate maskedusername, generate(user)

gen id=.
forvalues i=1/915{
replace id=`i' if user`i'==1
drop user`i'
}

tabulate maskedcustomername, generate(customer)

gen customer_team=.
forvalues i=1/31{
replace customer_team=`i' if customer`i'==1
drop customer`i'
}

reg working_hours WFH i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_id.tex, replace keep(WFH)
reg working_hours WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_id.tex, append keep(WFH time)
reg after_hours WFH i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_id.tex, append  keep(WFH)
reg after_hours WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_id.tex, append  keep(WFH time)
reg total_focus_hours WFH i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_id.tex, append  keep(WFH)
reg total_focus_hours WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_id.tex, append  keep(WFH time)
reg collaboration_hours WFH i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_id.tex, append keep(WFH)
reg collaboration_hours WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_id.tex, append keep(WFH time)

*************************************************************************
* Table 8: Shift in Networking Patterns and types of meetings due to WFH
*************************************************************************

gen all_emails=emails_sent_to_50_or_more_recipi+emails_sent_11_to_50_recipients+emails_sent_6_to_10_recipients+emails_sent_2_to_5_recipients+emails_sent_1_recipient

reg internal_network_size WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id.tex, replace keep(WFH time)
reg external_network_size WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id.tex, append keep(WFH time)
reg networking_outside_organization WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id.tex, append keep(WFH time)
reg networking_outside_company WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id.tex, append keep(WFH time)
reg meeting_hours_with_manager WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id.tex, append keep(WFH time)
reg meeting_hours_with_manager_1_on WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id.tex, append keep(WFH time)
reg manager_coaching_hours_1_on_1 WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id.tex, append keep(WFH time)
reg all_emails WFH time i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id.tex, append keep(WFH time)

*******************************************************************************************
* Appendix Table A.7: Shift in Working Patterns due to WFH with Change in Trend
*******************************************************************************************

gen WFHXtime=WFH*time

reg working_hours time WFH WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_time_id.tex, replace keep(time WFH WFHXtime)
reg after_hours time WFH WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_time_id.tex, append keep(time WFH WFHXtime)
reg total_focus_hours time WFH WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_time_id.tex, append keep(time WFH WFHXtime)
reg collaboration_hours time WFH WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_outcomes_time_id.tex, append keep(time WFH WFHXtime)

*******************************************************
* Appendix Table A.8: Networking with Change in Trend
*******************************************************

reg internal_network_size WFH time WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id2.tex, replace keep(WFH time WFHXtime)
reg external_network_size WFH time WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id2.tex, append keep(WFH time WFHXtime)
reg networking_outside_organization WFH time WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id2.tex, append keep(WFH time WFHXtime)
reg networking_outside_company WFH time WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id2.tex, append keep(WFH time WFHXtime)
reg meeting_hours_with_manager WFH time WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id2.tex, append keep(WFH time WFHXtime)
reg meeting_hours_with_manager_1_on WFH time WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id2.tex, append keep(WFH time WFHXtime)
reg manager_coaching_hours_1_on_1 WFH time WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id2.tex, append keep(WFH time WFHXtime)
reg all_emails WFH time WFHXtime i.customer_team i.id, cluster(id)
outreg2 using WPA_networking_id2.tex, append keep(WFH time WFHXtime)

**************************************************************
* Table A.3: Pairwise Correlation of Meeting-Related Variables
**************************************************************

pwcorr total_focus_hours meet_hours meeting_hours_with_manager_1_on meeting_hours_with_manager manager_coaching_hours_1_on_1 calls, star(0.01)

******************************************************************
* Table A.4: Pairwise Correlation of Networking-Related Variables.
******************************************************************


pwcorr internal_network_size external_network_size networking_outside_organization networking_outside_company all_emails, star(0.01)

*******************************************
* Table 9: Productivity in the WPA sample
*******************************************

use WFH_merged_data_also_TIME.dta, clear

gen monthXWFH=monthnumbercentered*WFH
gen productivity_B=KPIValue/(4*working_hours)

reg productivity WFH i.customer_team i.id, cluster(id)
outreg2 using productivity.tex, replace keep(WFH)
reg productivity_B WFH i.customer_team i.id, cluster(id)
outreg2 using productivity.tex, append keep(WFH)
reg productivity_B WFH monthnumbercentered monthXWFH i.customer_team i.id, cluster(id)
outreg2 using productivity.tex, append keep(WFH)


*******************************************************************************************************
*Table 10: Variables selected by Lasso and Elasticity of Productivity with respect to selected variable
*******************************************************************************************************

gen productivity_dummy=0
replace productivity_dummy=1 if productivity_dm>0 & productivity_dm!=.


lasso linear productivity focus_dummy working_dummy after_dummy meet_dummy calls_dummy mails_dummy ext_network_dummy int_network_dummy network_EXT_dummy network_ORG_dummy meets_manager_dummy meets_1on_dummy coaching_dummy if WFH==0 , selection(adaptive)
lassocoef

reg productivity_dummy focus_dummy working_dummy ext_network_dummy network_EXT_dummy network_ORG_dummy meets_1on_dummy if WFH==0 
margins, eyex(focus_dummy working_dummy ext_network_dummy network_EXT_dummy network_ORG_dummy meets_1on_dummy)

lasso linear productivity focus_dummy working_dummy after_dummy meet_dummy calls_dummy mails_dummy ext_network_dummy int_network_dummy network_EXT_dummy network_ORG_dummy meets_manager_dummy meets_1on_dummy coaching_dummy if WFH==1, selection(adaptive)
lassocoef

reg productivity_dummy focus_dummy mails_dummy ext_network_dummy int_network_dummy network_EXT_dummy network_ORG_dummy coaching_dummy if WFH==1
margins, eyex(focus_dummy mails_dummy ext_network_dummy int_network_dummy network_EXT_dummy network_ORG_dummy coaching_dummy)


******************************************************************************************************
*Appendix Figure B.1: Kernel density estimates of subjective Ratings for different levels of Output
******************************************************************************************************

npregress kernel PerformanceRatings KPIValue
margins, at(KPIValue=(95(1)105) ) 
marginsplot, xtitle("Output") title("Kernel regression of Performance Ratings") 

********************************************************************************************************************************
* Appendix Figure B.2: Working Hours over time with different stages of lockdown and removal of lockdown restrictions in India.
********************************************************************************************************************************

* this is the same Figure as Figure 3a; just now lockdown stages are added manually as vertical lines.

*********************************************************************************
*Appendix Figure B.3: Productivity during WFH depending on pre-WFH productivity.
*********************************************************************************

sort id WFH
by id WFH: egen proddd=mean(productivity)

gen pre_WFH_pro=proddd*(1-WFH)
gen post_WFH_pro=proddd*WFH

replace pre_WFH_pro=. if pre_WFH_pro==0
sort id
by id: egen productivity_pre=mean(pre_WFH_pro)

twoway (scatter post_WFH_pro productivity_pre if productivity_pre<3 & post_WFH_pro<3 & post_WFH_pro!=0, sort) (line productivity_pre productivity_pre if productivity_pre<3 & post_WFH_pro<3 & post_WFH_pro!=0, sort)

****************************************************
* Figure B.4: Technological shift pre- and post WFH
****************************************************

use WFH_WPA_data.dta, clear

* B4a
gen all_emails=emails_sent_to_50_or_more_recipi+emails_sent_11_to_50_recipients+emails_sent_6_to_10_recipients+emails_sent_2_to_5_recipients+emails_sent_1_recipient

sort time
by time: egen mean_emails=mean(all_emails)
by time: egen sd_emails=sd(all_emails)
by time: egen n_emails=count(all_emails)

gen upper=mean_emails+invttail(n_emails-1,0.025)*(sd_emails/sqrt(n_emails))
gen lower=mean_emails-invttail(n_emails-1,0.025)*(sd_emails/sqrt(n_emails))

twoway (connected mean_emails time, sort xline(0, lcolor(black)) xlabel(-10(2)24) ytitle("Emails sent") xtitle("Week")) (rcap upper lower time, lcolor(black) lwidth(vvthin)), leg(off)

graph save emails.gph, replace
graph export emails.png, replace


drop upper lower

* B4b

sort time

gen calls=total_unscheduled_calls+total_scheduled_calls
by time: egen mean_calls=mean(calls)
by time: egen sd_calls=sd(calls)
by time: egen n_calls=count(calls)


gen upper=mean_calls+invttail(n_calls-1,0.025)*(sd_calls/sqrt(n_calls))
gen lower=mean_calls-invttail(n_calls-1,0.025)*(sd_calls/sqrt(n_calls))


twoway (connected mean_calls time, sort xline(0, lcolor(black)) xlabel(-10(2)24) ytitle("Number of Calls") xtitle("Week")) (rcap upper lower time, lcolor(black) lwidth(vvthin)), leg(off)

graph save calls.gph, replace
graph export calls.png, replace

drop upper lower

************************************************************************
* Appendix Figure B.5: Productivity pre- and post WFH in the WPA sample
************************************************************************

use WFH_merged_data_also_TIME.dta, clear
gen productivity_B=KPIValue/(4*working_hours)

* B5a

egen zproductivity=std(productivity)

sort time
by time: egen mean_productivity=mean(zproductivity)
by time: egen sd_productivity=sd(zproductivity)
by time: egen n_productivity=count(zproductivity)

gen upper=mean_productivity+invttail(n_productivity-1,0.025)*(sd_productivity/sqrt(n_productivity))
gen lower=mean_productivity-invttail(n_productivity-1,0.025)*(sd_productivity/sqrt(n_productivity))

twoway (connected mean_productivity time, sort xline(0, lcolor(black)) xlabel(-10(2)24) ylabel(-0.1(0.05)0.1) ytitle("Productivity (Sapience)") xtitle("Week")) (rcap upper lower time, lcolor(black) lwidth(vvthin)), leg(off)

graph save productivity_A.gph, replace
graph export productivity_A.png, replace

drop upper lower

*B5b

egen zproductivity_B=std(productivity_B)

sort time
by time: egen mean_productivity_B=mean(zproductivity_B)
by time: egen sd_productivity_B=sd(zproductivity_B)
by time: egen n_productivity_B=count(zproductivity_B)


gen upper=mean_productivity_B+invttail(n_productivity_B-1,0.025)*(sd_productivity_B/sqrt(n_productivity_B))
gen lower=mean_productivity_B-invttail(n_productivity_B-1,0.025)*(sd_productivity_B/sqrt(n_productivity_B))

twoway (connected mean_productivity_B time, sort xline(0, lcolor(black)) xlabel(-10(2)24) ylabel(-0.2(0.1)0.2) ytitle("Productivity (WPA)") xtitle("Week")) (rcap upper lower time, lcolor(black) lwidth(vvthin)), leg(off)

graph save productivity_B.gph, replace
graph export productivity_B.png, replace

--- end of do file ---
